if (!require("pacman")) install.packages("pacman")
Loading required package: pacman
pacman::p_load(tidyverse, ggridges, glue, scales, ggthemes, openintro, ggrepel, dsbox, janitor, fs)# set theme for ggplot2ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))# set width of code outputoptions(width =85)# set figure parameters for knitrknitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)###All responses are in comments within the code****
1 - A new day, a new plot, a new geom
#read from dsbox pkg dataset#glimpse(edibnb)edibnb <- edibnb |>filter(!is.na(neighbourhood), !is.na(review_scores_rating))median_rating <- edibnb |>group_by(neighbourhood) |>summarise(median_review_score =median(review_scores_rating, na.rm =TRUE))|>arrange(median_review_score)# got the median review score per neighborhood#glimpse(median_rating)neighborhood_order <- edibnb |>mutate(neighbourhood =factor(neighbourhood, levels = median_rating$neighbourhood)) #orderingggplot(neighborhood_order, aes(x = review_scores_rating, y = neighbourhood)) +geom_density_ridges(alpha =0.7) +theme(legend.position ="none",plot.subtitle =element_text(size =8)) +labs(title ="Airbnb Review of Edinburgh Neighborhood",x ="Review Score (0–100)",y ="Neighborhood",subtitle ="DensityRidge Plot" )
Picking joint bandwidth of 1.23
This plot, shows the distribution of Airbnb review scores across different neighborhoods in Edinburgh. Each ridge represents a neighborhood and illustrates how review scores are spread out for listings in that area. The highest scoring neighborhoods tend to have a “taller” distribution curve, implying a tighter spread of high review scores compared to the bottom few.
2 - Foreign Connected PACs
# get a list of files with "Foreign Connected PAC" in their nameslist_of_files <-dir_ls(path ="data", regexp ="Foreign Connected PAC")# read all files and bindpac <-read_csv(list_of_files, id ="year")# Clean the column namescleaned_pac <-clean_names(pac)|>separate(`country_of_origin_parent_company`, into =c("country_of_origin", "parent_company"),sep ="/") |>mutate(yearStr =str_split(year, "-", simplify =TRUE)[, 2],clean_year_string =str_remove(yearStr, fixed(".csv")),year =as.integer(clean_year_string),repubs =as.numeric(str_remove(repubs, "\\$")),dems =as.numeric(str_remove(dems, "\\$"))) |>select(-yearStr, -clean_year_string, -total)#glimpse(cleaned_pac)updated_pac_data <- cleaned_pac |>pivot_longer(cols =c(dems, repubs), names_to ="party", values_to ="amount" )updated_pac_data <- updated_pac_data |>mutate(party =case_when( party =="dems"~"Democrat", party =="repubs"~"Republican" ) )uk_pac <- updated_pac_data |>filter(country_of_origin =="UK") |>group_by(year, party) |>summarise(total_amount =sum(amount), .groups ="drop")#glimpse(uk_pac)ggplot(uk_pac, aes(x = year, y = total_amount /1000000, color = party)) +geom_line(linewidth =1.1) +labs(y ="Total amount",x ="Year",title ="UK-connected PACs' Contributions to US political parties" ) +scale_y_continuous(labels =dollar_format(prefix ="$", suffix ="M")) +scale_color_manual(name ="Party", values =c("Democrat"="blue", "Republican"="red"))
Mexico_pac <- updated_pac_data |>filter(country_of_origin =="Mexico") |>group_by(year, party) |>summarise(total_amount =sum(amount))#glimpse(Mexico_pac)ggplot(Mexico_pac, aes(x = year, y = total_amount /1000000, color = party)) +geom_line(linewidth =1.1) +labs(y ="Total amount",x ="Year",title ="Mexico-connected PACs' Contributions to US political parties" ) +scale_color_manual(name ="Party", values =c("Democrat"="blue", "Republican"="red")) +scale_y_continuous(labels =dollar_format(prefix ="$", suffix ="M"))
Considered Mexico for the follow-up plot. Unlike UK, Mexico has been traditionally a low player in terms of $ value also seems, there is sharp increase for Republicansaround 2008-10 while also faing sharp decline in later years